
import pandas as pd
import pickle

from src.model_details import hydropathy, conf_similarity
from src.utils import  get_data, filter_standard_amino_acid
from src.seq2featuresV1 import Transformer, GetModels

with open('model/standard_scalar_model.pkl', 'rb') as f:
    scaler = pickle.load(f)

with open('model/SVC_model.pkl','rb') as f:
    clf = pickle.load(f)

def transform_data(seq):
    model_loc = 'model/'
    alphabets_hydropathy = GetModels.singles(model_loc, hydropathy)
    alphabets_conf_similarity = GetModels.singles(model_loc, conf_similarity)

    alphabets_combined = [alphabets_hydropathy[0], alphabets_conf_similarity[0]]

    transformer = Transformer()
    transformer.set_modelList(alphabets_combined, ProtVec=None)

    transformer.set_data(seq, [])
    return transformer.xData

def get_prediction(fasta_file):
    df = get_data(fasta_file)
    valid, invalid = filter_standard_amino_acid(df)
    x_vec = transform_data(valid)
    x_vec = scaler.transform(x_vec)
    y_pred = clf.predict(x_vec)

    output = pd.DataFrame(list(zip(valid, y_pred)), columns=['Sequence', 'Prediction'])
    output = output.append(pd.DataFrame(list(zip(invalid, ['contains non-standard amino acid']*len(invalid))),
                                        columns=['Sequence', 'Prediction']),
                            ignore_index=True)

    return output
